Skip to content

Conversation

@mwarusz
Copy link
Member

@mwarusz mwarusz commented Apr 8, 2025

Adds wind forcing and bottom drag tendency terms. Includes some refactoring of OceanTestCommon.h to make it easier to test 1D fields.

This PR also partially addresses #178 because it removes the Default suffix from all auxiliary variables. However, I did not rename SshCell to just Ssh. @xylar

Checklist

  • Documentation:
    • User's Guide has been updated
    • Developer's Guide has been updated
    • Documentation has been built locally and changes look as expected
  • Testing
    • CTest unit tests for new features have been added per the approved design.
    • Polaris tests for new features have been added per the approved design (and included in a test suite)
    • Unit tests have passed. Please provide a relevant CDash build entry for verification.
    • Polaris test suite has passed

@mark-petersen
Copy link

Passes ctests on perlmutter and frontier, cpu and gpu.

build and run sequence:

######### Frontier cpu ############
export CODEDIR=opr
export RUNDIR=test_omega_gnu
mkdir /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR
cd !$

cd /ccs/home/mpetersen/repos/E3SM/${CODEDIR}
git submodule update --init --recursive externals/YAKL externals/ekat externals/scorpio cime
cd /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR

module load cmake
rm -rf build
mkdir build
cd build

# compiler options are:
export compiler=craycray
#export compiler=craygnu
#export compiler=crayamd

export PARMETIS_ROOT=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_6_0_${compiler}_mpich/var/spack/environments/dev_polaris_0_6_0_${compiler}_mpich/.spack-env/view

cmake \
   -DOMEGA_CIME_COMPILER=${compiler} \
   -DOMEGA_PARMETIS_ROOT=${PARMETIS_ROOT}\
   -DOMEGA_BUILD_TYPE=Release \
   -DOMEGA_CIME_MACHINE=frontier \
   -DOMEGA_VECTOR_LENGTH=1 \
   -DOMEGA_BUILD_TEST=ON \
   -Wno-dev \
   -S /ccs/home/mpetersen/repos/E3SM/${CODEDIR}/components/omega -B .
# note OMEGA_VECTOR_LENGTH=8 fails MPI tests on CPUs.
./omega_build.sh

# linking:
cd test
ln -isf ~/meshes/omega/O*nc .
cp /ccs/home/mpetersen/repos/E3SM/${CODEDIR}/components/omega/configs/Default.yml omega.yml

# -S is number of GPUs: Count of Specialized Cores per node

salloc -A cli115 -J inter -t 2:00:00 -q debug -N 1 -S 0
cd /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR/build
./omega_ctest.sh

######### Frontier gpu #########
export CODEDIR=opr
export RUNDIR=test_omega_gpu_dev
mkdir /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR
cd !$

cd /ccs/home/mpetersen/repos/E3SM/${CODEDIR}
git submodule update --init --recursive externals/YAKL externals/ekat externals/scorpio cime
cd /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR

module load cmake
rm -rf build
mkdir build
cd build

# compiler options are:
export compiler=craycray-mphipcc 
#export compiler=craygnu-mphipcc  # 250422 error on cmake, module cmake, git missing
#export compiler=crayamd-mphipcc  

export PARMETIS_ROOT=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_6_0_${compiler}_mpich/var/spack/environments/dev_polaris_0_6_0_${compiler}_mpich/.spack-env/view

#module load Core/24.07
#module load cmake/3.27.9 git/2.45.1
cmake \
   -DOMEGA_CIME_COMPILER=${compiler} \
   -DOMEGA_PARMETIS_ROOT=${PARMETIS_ROOT}\
   -DOMEGA_BUILD_TYPE=Release \
   -DOMEGA_CIME_MACHINE=frontier \
   -DOMEGA_BUILD_TEST=ON \
   -DOMEGA_VECTOR_LENGTH=1 \
   -Wno-dev \
   -Wno-deprecated \
   -S /ccs/home/mpetersen/repos/E3SM/${CODEDIR}/components/omega -B .
./omega_build.sh

# linking:
cd test
ln -isf ~/meshes/omega/O*nc .
cp /ccs/home/mpetersen/repos/E3SM/${CODEDIR}/components/omega/configs/Default.yml omega.yml


salloc -A cli115 -J inter -t 2:00:00 -q debug -N 1 -p batch
# -p is partition name
cd /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR/build
./omega_ctest.sh


######### perlmutter CPU
export CODEDIR=opr
export RUNDIR=test_omega_cpu
#export CODEDIR=omega-develop

cd /global/homes/m/mpeterse/repos/E3SM/${CODEDIR}
#git fetch
#git reset --hard origin/develop
git submodule update --init --recursive externals/ekat externals/scorpio cime
cd components/omega/

module load cmake
mkdir ${PSCRATCH}/runs/$RUNDIR
cd !$

rm -rf build
mkdir build
cd build

# compiler options are:
export compiler=gnu
#export compiler=nvidia # not working 250421

export PARMETIS_ROOT=/global/cfs/cdirs/e3sm/software/polaris/pm-cpu/spack/dev_polaris_0_6_0_${compiler}_mpich/var/spack/environments/dev_polaris_0_6_0_${compiler}_mpich/.spack-env/view

# nvidia or gnu compiler:
cmake \
   -DOMEGA_CIME_COMPILER=${compiler} \
   -DOMEGA_BUILD_TYPE=Release \
   -DOMEGA_CIME_MACHINE=pm-cpu \
   -DOMEGA_PARMETIS_ROOT=${PARMETIS_ROOT}\
   -DOMEGA_BUILD_TEST=ON \
   -DOMEGA_VECTOR_LENGTH=1 \
   -Wno-dev \
   -S /global/homes/m/mpeterse/repos/E3SM/${CODEDIR}/components/omega -B .
# note OMEGA_VECTOR_LENGTH=8 fails MPI tests on CPUs.
./omega_build.sh

# linking:
cd test
ln -isf /global/homes/m/mpeterse/meshes/omega/O*nc .
cp /global/homes/m/mpeterse/repos/E3SM/${CODEDIR}/components/omega/configs/Default.yml omega.yml

# run test:
salloc --nodes 1 --qos interactive --time 01:00:00 --constraint cpu --account=m4572 # or e3sm
cd ${PSCRATCH}/runs/${RUNDIR}/build

./omega_ctest.sh

also:
./omega_ctest.sh -R REDUCTIONS_TEST

# does not pass REDUCTIONS_TEST
# output in Testing/Temporary/LastTest.log
# old, gnu:
# export PARMETIS_ROOT=/global/cfs/cdirs/e3sm/software/polaris/pm-cpu/spack/dev_polaris_0_3_0_gnu_mpich/var/spack/environments/dev_polaris_0_3_0_gnu_mpich/.spack-env/view

######### perlmutter GPU
salloc --nodes 4 --qos interactive --time 01:00:00 --constraint gpu --tasks-per-node=2 --gpus-per-task 1 --account=m4572_g # or e3sm_g

# perlmutter has nodes with either 40 or 80 gb of high bandwidth memory, and the system defaults to 40. You can ask for 80 gb nodes with the sbatch flag --constraint="gpu&hbm80gb"

export CODEDIR=opr
export RUNDIR=test_omega_gpu
mkdir ${PSCRATCH}/runs/$RUNDIR
cd !$

rm -rf build
mkdir build
cd build
module load cmake

# compiler options are:
export compiler=gnugpu
#export compiler=nvidiagpu

export PARMETIS_ROOT=/global/cfs/cdirs/e3sm/software/polaris/pm-gpu/spack/dev_polaris_0_6_0_${compiler}_mpich/var/spack/environments/dev_polaris_0_6_0_${compiler}_mpich/.spack-env/view
cmake \
   -DOMEGA_CIME_COMPILER=${compiler} \
   -DOMEGA_BUILD_TYPE=Release \
   -DOMEGA_CIME_MACHINE=pm-gpu \
   -DOMEGA_PARMETIS_ROOT=${PARMETIS_ROOT}\
   -DOMEGA_BUILD_TEST=ON \
   -Wno-dev \
   -DOMEGA_MPI_ON_DEVICE:BOOL=OFF \
   -S /global/homes/m/mpeterse/repos/E3SM/${CODEDIR}/components/omega -B .
# needed for compiler bug: OMEGA_MPI_ON_DEVICE:BOOL=OFF. See https://github.com/E3SM-Project/Omega/issues/214
./omega_build.sh

# linking:
cd test
ln -isf /global/homes/m/mpeterse/meshes/omega/O*nc .
cp /global/homes/m/mpeterse/repos/E3SM/${CODEDIR}/components/omega/configs/Default.yml omega.yml

cd ..
./omega_ctest.sh

| HTracersEdge | thickness-weighted tracers used for fluxes through edges. May be centered, upwinded or a combination of the two
| Del2TracersCell | laplacian of tracers on cells
| ZonalStressEdge | zonal component of wind stress on cells
| MeridStressEdge | meridional component of wind stress on cells

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these should be ZonalStressCell and MeridStressCell, no? Later in the code these are defined on cells.

Also, I think having wind in the name would be good to differentiate from other potential sources of stress?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that defining it on cells makes sense, given that eventually these fields are coupled on cell centers.

I think in one of our recent meetings we decided to keep the stress generic. It could be helpful to specify that it is at the top or surface though. Stress from both sea ice and wind currently aren't separable in MPAS-Ocean when they are passed from the coupler. We could decide to change this, of course.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cbegeman I don't understand your second comment. It's not clear to me what external forced bottom stresses there may be (perhaps wave radiation stress is one)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't mean to imply that we would have externally forced bottom stresses. Just that I think we're using these variables to refer to only stresses at the surface but the variable name quite generic.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see. Thanks for the clarification. appending Surface does make sense for clarity. And I fully agree, these should be on cells, not edges.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these should be ZonalStressCell and MeridStressCell, no? Later in the code these are defined on cells.

Yes, thanks for catching this.

To clarify how I came up with the names, in the code these variables are part of WindForcingAuxVars class. For me this means that the full name of ZonalStressCell is really WindForcingAuxVars.ZonalStressCell, and that makes it clear that this variable refers only to the wind stress (at the surface).

However, I see how this can be confusing, especially since the user docs do not mention the groupings of auxiliary variables. Moreover, there are no class prefixes in stream field names, so currently the name of ZonalStressCell used for I/O is WindStressZonal.

MeshScalingDel4(Mesh->MeshScalingDel4), EdgeMask(Mesh->EdgeMask) {}

WindForcingOnEdge::WindForcingOnEdge(const HorzMesh *Mesh)
: SaltWaterDensity(1.026e3) {}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this meant to be the reference ocean density? If so, I think it should come from the namelist.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this variable to come from the namelist

Tendencies:
    Density0: 1026.0

However, it seems to me that using the reference ocean density in this term is related to the Boussinesq approximation. We might want to change this.

| Del2TracersCell | laplacian of tracers on cells
| ZonalStressEdge | zonal component of wind stress on cells
| MeridStressEdge | meridional component of wind stress on cells
| NormalStressEdge | normal component of wind stress on edge
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generic comment - do we want this in AuxState? Would it make sense to have a 'forcing module' where this lives?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are grouped in the WindForcingAuxVars class, so they aren't directly members of the aux state class. This is mentioned in the dev docs, but maybe it should also included be in the user docs.

Comment on lines 21 to 34
KOKKOS_FUNCTION void computeVarsOnEdge(int IEdge, int KChunk) const {
if (KChunk == 0) {
const int JCell0 = CellsOnEdge(IEdge, 0);
const int JCell1 = CellsOnEdge(IEdge, 1);
const Real ZonalStressEdge =
0.5_Real * (ZonalStressCell(JCell0) + ZonalStressCell(JCell1));
const Real MeridStressEdge =
0.5_Real * (MeridStressCell(JCell0) + MeridStressCell(JCell1));

NormalStressEdge(IEdge) =
Kokkos::cos(AngleEdge(IEdge)) * ZonalStressEdge +
Kokkos::sin(AngleEdge(IEdge)) * MeridStressEdge;
}
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be a more general edge reconstruction routine located elsewhere?

Also, this version is the anisotropic one we've moved away from for MPAS-Ocean. See E3SM-Project#6917.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I implemented the isotropic version and created a functor that can do both variants in HorzOperators.h. I added a namelist option to choose:

  WindStress:
    InterpType: Isotropic

// Compute bottom drag
if (LocBottomDrag.Enabled) {
parallelFor(
{NEdgesAll, NChunks}, KOKKOS_LAMBDA(int IEdge, int KChunk) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure why we need a loop over NChunks. I guess if Omega-0 is planning to do wind with multiple layers that's fine, but we'll have to change it later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the bottom drag term to loop only over the edges in the bottom layer. For the wind stress term I kept the loop over multiple layers, since that's what is done in MPAS-O with some coefficient to distribute the stress that depends on the vertical coordinate. Once we have a vertical coordinate we can do the same in Omega.

@mark-petersen
Copy link

mark-petersen commented May 6, 2025

Tested using barotropic gyre in polaris, which has an exact solution. Here I am using the same initial condition file for both. Both have the same viscosity, RK4 time step, same dt. MPAS-Ocean is running SW equations, with all vertical terms turned off. This is on a regular hex plane with 4200 cells.

Omega Year 4 snapshot
250502-mpaso-barotropic_gyre
MPAS-Ocean Year 4 snapshot
250502-omega-barotropic_gyre

I would say that this is resounding support that this PR is working properly. We can retest if the stencil is altered per @cbegeman's comment above. The stencil difference from MPAS-O might cause the slight differences in the plots.

I had to alter items by hand in omega.yml and in the omega polaris analysis step to get this all working. Thanks to @philipwjones and @brian-oneill for assistance with Omega.

I used gnu on perlmutter cpu, single node, for both. My run directories are here:

/pscratch/sd/m/mpeterse/runs/250502-mpaso-barotropic_gyre/ocean/planar/barotropic_gyre/default
/pscratch/sd/m/mpeterse/runs/250502-omega-barotropic_gyre/ocean/planar/barotropic_gyre/default
Notes on how I ran this test:

new polaris:
source load_dev_polaris_0.7.0-alpha.1_pm-cpu_gnu_mpich.sh
MPAS executable is:
ocean_model_250502_64046ec7_lo_gnu-cray_openmp

polaris list | grep gyre
  19: ocean/planar/barotropic_gyre/default

polaris setup -p $e/master/components/mpas-ocean -w $r/250502-mpaso-barotropic_gyre -n 19
Setting up tasks:
  ocean/planar/barotropic_gyre/default

test_omega_cpu/build/test

polaris setup -p $e/opr/components/omega -w $r/250502-omega-barotropic_gyre -n 19 --model omega

new run here: /pscratch/sd/m/mpeterse/runs/250502-omega-barotropic_gyre/ocean/planar/barotropic_gyre/default/short_forward
added
ln -isf /pscratch/sd/m/mpeterse/runs/test_omega_cpu/build/src/omega.exe .

Worked!
/pscratch/sd/m/mpeterse/runs/250502-omega-barotropic_gyre/ocean/planar/barotropic_gyre/default/short_forward
ln -isf init.nc OmegaMesh.nc
 45       UsePointerFile: false

Omega has a critical error if a tracer (salinity in this case) is not in the init file. For this shallow water case, we simply removed all tracer lines from the omega.yml Tracers: section, and it ran to completion.

 29     WindForcingTendencyEnable: true
 30     BottomDragTendencyEnable: true
 31     BottomDragCoeff: 0.001

Okay, in the omega.yml file, need to remove - Base from the contents on the InitialState IOStream. Base refers to the base tracers, which are no longer defined since they were removed. (edited) 

Also, the code needs a statement to read in the Forcing stream. I added Err = IOStream::read("Forcing", ModelClock, ReqMeta);  to src/ocn/OceanInit.cpp after the InitialState stream is read in at line 182, and that seems to be enough to get it working. When I run and look at the output, looks like it's actually doing stuff now, and it's not just NaNs and 0s.

Thanks! Rather than change the code, I added the wind to the init file:
ncks -A -v windStressZonal,windStressMeridional forcing.nc init.nc
and I can see it come out the other end now.

ncks -A -v windStressZonal,windStressMeridional forcing.nc init_with_wind.nc

pwd
/pscratch/sd/m/mpeterse/runs/250502-omega-barotropic_gyre/ocean/planar/barotropic_gyre/default/analysis
ln -isf ../long_forward/ocn.hist.0004-01-01_00\:00\:48.nc output.nc
ncrename -d time,Time ocn.hist.0004-01-01_00\:00\:48.nc output.nc
ncrename -d NVertLevels,nVertLevels output.nc
ncrename -v LayerThickness,layerThickness output.nc
ncrename -d NCells,nCells output.nc
ncrename -v NormalVelocity,normalVelocity output.nc
ncrename -d NEdges,nEdges output.nc
# note: on login node, not interactive. That hangs.
polaris serial

change extents:
pm:g:main$ pwd
/global/homes/m/mpeterse/repos/polaris/main
pm:g:main$ vi polaris/tasks/ocean/barotropic_gyre/analysis.py +131
114         #bounds = np.linspace(-eta0, eta0,11)
115         bounds = np.linspace(-0.7, 0.7, 8)

@vanroekel
Copy link
Collaborator

@mark-petersen is the increase in error along the boundary in omega resulting from the 'partial slip' boundary in mpas-ocean? I can't recall if the boundary conditions are identical in mpas vs. omega

@xylar
Copy link

xylar commented May 6, 2025

I had to alter items by hand in omega.yml and in the omega polaris analysis step to get this all working.

@mark-petersen, I know you'll be away but would you have time to implement these changes in Polaris when you're back?

@cbegeman
Copy link

cbegeman commented May 6, 2025

I had to alter items by hand in omega.yml and in the omega polaris analysis step to get this all working.

@mark-petersen, I know you'll be away but would you have time to implement these changes in Polaris when you're back?

I posted a comment along these lines on the relevant draft PR: E3SM-Project/polaris#286 (comment)

@mark-petersen
Copy link

I tested the updated wind stencil in Omega that matches MPAS-O and that just made a small difference. So the boundary treatment is the main suspect now for the difference between Omega and MPAS-O.

Omega with original wind stencil InterpType: Isotropic
image
Omega with new wind stencil InterpType: Anisotropic
image
MPAS-Ocean Year 4 snapshot
comparison

@mark-petersen
Copy link

mark-petersen commented May 30, 2025

Thanks @brian-oneill for adding the correction to the boundary masking, which is here: https://github.com/brian-oneill/E3SM/tree/omega/wind-forcing-bdry-edits. Now Omega and MPAS-O wind-forced results are much closer.

Barotropic Gyre
Both use the anisotropic wind stress stencil and the same boundary masking. Images after 4 years.
Omega
image

MPAS-Ocean
comparison

Global wind-forced case
Both use the anisotropic wind stress stencil and the same boundary masking as above. This is after 30 days using the icos30 mesh.

Omega
omega_ssh_day30

MPAS-Ocean
mpas_ssh_day30

difference
diff_ssh_day30

Copy link

@mark-petersen mark-petersen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the successful results above, I think this is now complete.

@mark-petersen mark-petersen self-assigned this May 30, 2025
@mark-petersen
Copy link

The plan is to merge this in, and then @brian-oneill will add the masking alterations in another PR to keep things clear. (see https://github.com/brian-oneill/E3SM/tree/omega/wind-forcing-bdry-edits)

Copy link

@cbegeman cbegeman left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Approving based on relatively small differences from MPAS-Ocean results and reasonable agreement with the analytic solution for a free slip boundary condition (expected increased drag associated with partial slip). Thanks, @mwarusz for your work on the code, @brian-oneill for prioritizing the masking, and @mark-petersen for testing.

@mark-petersen
Copy link

The images above are snapshots. Comparing averages over month of December of year 4 here.

Omega with Anisotropic wind, and boundary mask fix.
image

MPAS-Ocean
image

So the differences also show up in a month-long average. (Omega has daily samples because I had to write them to disk; MPAS-Ocean has 10-minute samples)

Copy link

@katsmith133 katsmith133 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Approving based upon visual inspection and comparisons made by @mark-petersen between MPAS-O and Omega.

Copy link
Collaborator

@vanroekel vanroekel left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Approving based on visual inspection of code and testing from @mark-petersen, differences are small enough to proceed. Thanks @mwarusz and @brian-oneill!

@philipwjones
Copy link

@mark-petersen I fixed the errors and conflicts from the update to latest develop. Passes the unit tests on Chrysalis - all the changes were associated with the changes to Config interfaces and shouldn't impact anything else.

@mark-petersen
Copy link

Retested head. Passed all tests on Perlmutter CPU compiler=gnu and GPU compiler=gnugpu; Frontier compiler=craycray CPU and compiler=craycray-mphipcc GPU, except that Frontier CPU failed on

27/31 Test #27: TIMESTEPPER_TEST ...................***Failed    0.72 sec
0: [2025-06-05 10:42:24.697] [info] [TimeStepperTest.cpp:416] ----- Time Stepper Unit Test -----
0: non-void function did not return a value
0: Kokkos contract violation:
0:     Asserted condition `(Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_enabled())` evaluated false.
0: Error at "/ccs/home/mpetersen/repos/E3SM/opr/externals/ekat/extern/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp":638

This error did not occur on the head of develop. My call sequence is as follows (see Frontier cpu section)

build and run sequence:

######### Frontier cpu ############
Add to the end of the file corresponding to
vi cime_config/machines/cmake_macros/craycray_frontier.cmake
string(APPEND CMAKE_CXX_FLAGS_RELEASE "-O3")
* 8ab4d7b7d5 (HEAD -> opr) Add -O3 to Frontier craycray build
* 41f184ef57 timers (from Kieran for halo)
* 8cd2268d18 (HEAD -> opr) Add -O3 to Frontier craycray build 250605

export CODEDIR=opr
export RUNDIR=test_omega_gnu
mkdir /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR
cd !$

cd /ccs/home/mpetersen/repos/E3SM/${CODEDIR}
git submodule update --init --recursive externals/YAKL externals/ekat externals/scorpio cime
cd /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR

module load cmake
rm -rf build
mkdir build
cd build

# compiler options are:
export compiler=craycray
#export compiler=craygnu
#export compiler=crayamd

export PARMETIS_ROOT=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_6_0_${compiler}_mpich/var/spack/environments/dev_polaris_0_6_0_${compiler}_mpich/.spack-env/view

cmake \
   -DOMEGA_CIME_COMPILER=${compiler} \
   -DOMEGA_PARMETIS_ROOT=${PARMETIS_ROOT}\
   -DOMEGA_BUILD_TYPE=Release \
   -DOMEGA_CIME_MACHINE=frontier \
   -DOMEGA_VECTOR_LENGTH=1 \
   -DOMEGA_BUILD_TEST=ON \
   -Wno-dev \
   -S /ccs/home/mpetersen/repos/E3SM/${CODEDIR}/components/omega -B .
# note OMEGA_VECTOR_LENGTH=8 fails MPI tests on CPUs.
./omega_build.sh

# linking:
cd test
ln -isf ~/meshes/omega/O*nc .
cp /ccs/home/mpetersen/repos/E3SM/${CODEDIR}/components/omega/configs/Default.yml omega.yml

# -S is number of GPUs: Count of Specialized Cores per node

salloc -A cli115 -J inter -t 2:00:00 -q debug -N 1 -S 0
cd /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR/build
./omega_ctest.sh

######### Frontier gpu #########
export CODEDIR=opr
export RUNDIR=test_omega_gpu_dev
mkdir /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR
cd !$

cd /ccs/home/mpetersen/repos/E3SM/${CODEDIR}
git submodule update --init --recursive externals/YAKL externals/ekat externals/scorpio cime
cd /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR

module load cmake
rm -rf build
mkdir build
cd build

# compiler options are:
export compiler=craycray-mphipcc 
#export compiler=craygnu-mphipcc  # 250422 error on cmake, module cmake, git missing
#export compiler=crayamd-mphipcc  

export PARMETIS_ROOT=/ccs/proj/cli115/software/polaris/frontier/spack/dev_polaris_0_6_0_${compiler}_mpich/var/spack/environments/dev_polaris_0_6_0_${compiler}_mpich/.spack-env/view

#module load Core/24.07
#module load cmake/3.27.9 git/2.45.1
cmake \
   -DOMEGA_CIME_COMPILER=${compiler} \
   -DOMEGA_PARMETIS_ROOT=${PARMETIS_ROOT}\
   -DOMEGA_BUILD_TYPE=Release \
   -DOMEGA_CIME_MACHINE=frontier \
   -DOMEGA_BUILD_TEST=ON \
   -DOMEGA_VECTOR_LENGTH=1 \
   -Wno-dev \
   -Wno-deprecated \
   -S /ccs/home/mpetersen/repos/E3SM/${CODEDIR}/components/omega -B .
./omega_build.sh

# linking:
cd test
ln -isf ~/meshes/omega/O*nc .
cp /ccs/home/mpetersen/repos/E3SM/${CODEDIR}/components/omega/configs/Default.yml omega.yml


salloc -A cli115 -J inter -t 2:00:00 -q debug -N 1 -p batch
# -p is partition name
cd /lustre/orion/cli115/scratch/mpetersen/runs/$RUNDIR/build
./omega_ctest.sh

######### perlmutter CPU
export CODEDIR=opr
export RUNDIR=test_omega_cpu
#export CODEDIR=omega-develop

cd /global/homes/m/mpeterse/repos/E3SM/${CODEDIR}
#git fetch
#git reset --hard origin/develop
git submodule update --init --recursive externals/ekat externals/scorpio cime
cd components/omega/

module load cmake
mkdir ${PSCRATCH}/runs/$RUNDIR
cd !$

rm -rf build
mkdir build
cd build

# compiler options are:
export compiler=gnu
#export compiler=nvidia # not working 250421

export PARMETIS_ROOT=/global/cfs/cdirs/e3sm/software/polaris/pm-cpu/spack/dev_polaris_0_6_0_${compiler}_mpich/var/spack/environments/dev_polaris_0_6_0_${compiler}_mpich/.spack-env/view

# nvidia or gnu compiler:
cmake \
   -DOMEGA_CIME_COMPILER=${compiler} \
   -DOMEGA_BUILD_TYPE=Release \
   -DOMEGA_CIME_MACHINE=pm-cpu \
   -DOMEGA_PARMETIS_ROOT=${PARMETIS_ROOT}\
   -DOMEGA_BUILD_TEST=ON \
   -DOMEGA_VECTOR_LENGTH=1 \
   -Wno-dev \
   -S /global/homes/m/mpeterse/repos/E3SM/${CODEDIR}/components/omega -B .
# note OMEGA_VECTOR_LENGTH=8 fails MPI tests on CPUs.
./omega_build.sh

# linking:
cd test
ln -isf /global/homes/m/mpeterse/meshes/omega/O*nc .
cp /global/homes/m/mpeterse/repos/E3SM/${CODEDIR}/components/omega/configs/Default.yml omega.yml

# run test:
salloc --nodes 1 --qos interactive --time 01:00:00 --constraint cpu --account=m4572 # or e3sm
cd ${PSCRATCH}/runs/${RUNDIR}/build

./omega_ctest.sh

######### perlmutter GPU
salloc --nodes 4 --qos interactive --time 01:00:00 --constraint gpu --tasks-per-node=2 --gpus-per-task 1 --account=m4572_g # or e3sm_g

# perlmutter has nodes with either 40 or 80 gb of high bandwidth memory, and the system defaults to 40. You can ask for 80 gb nodes with the sbatch flag --constraint="gpu&hbm80gb"

export CODEDIR=opr
export RUNDIR=test_omega_gpu
mkdir ${PSCRATCH}/runs/$RUNDIR
cd !$

rm -rf build
mkdir build
cd build
module load cmake

# compiler options are:
export compiler=gnugpu
#export compiler=nvidiagpu

export PARMETIS_ROOT=/global/cfs/cdirs/e3sm/software/polaris/pm-gpu/spack/dev_polaris_0_6_0_${compiler}_mpich/var/spack/environments/dev_polaris_0_6_0_${compiler}_mpich/.spack-env/view
cmake \
   -DOMEGA_CIME_COMPILER=${compiler} \
   -DOMEGA_BUILD_TYPE=Release \
   -DOMEGA_CIME_MACHINE=pm-gpu \
   -DOMEGA_PARMETIS_ROOT=${PARMETIS_ROOT}\
   -DOMEGA_BUILD_TEST=ON \
   -Wno-dev \
   -DOMEGA_MPI_ON_DEVICE:BOOL=OFF \
   -S /global/homes/m/mpeterse/repos/E3SM/${CODEDIR}/components/omega -B .
# needed for compiler bug: OMEGA_MPI_ON_DEVICE:BOOL=OFF. See https://github.com/E3SM-Project/Omega/issues/214
./omega_build.sh

# linking:
cd test
ln -isf /global/homes/m/mpeterse/meshes/omega/O*nc .
cp /global/homes/m/mpeterse/repos/E3SM/${CODEDIR}/components/omega/configs/Default.yml omega.yml

cd ..
./omega_ctest.sh

@grnydawn
Copy link

grnydawn commented Jun 5, 2025

@mark-petersen , I was able to reproduce the failure with the TIMESTEPPER test on Frontier using the craycray CPU compiler. I also narrowed down the error to WindForcingAuxVars.h. The error disappeared when I set ZonalStressEdge and MeridStressEdge to constant values as shown below. I took a quick look at Interp (InterpCellToEdge type), but haven’t found anything further yet.

class WindForcingAuxVars {
 public:
   Array1DReal NormalStressEdge;
   Array1DReal ZonalStressCell;
   Array1DReal MeridStressCell;
   InterpCellToEdgeOption InterpChoice;

   WindForcingAuxVars(const std::string &AuxStateSuffix, const HorzMesh *Mesh);

   KOKKOS_FUNCTION void computeVarsOnEdge(int IEdge) const {
//      const Real ZonalStressEdge = Interp(IEdge, ZonalStressCell, InterpChoice);
//      const Real MeridStressEdge = Interp(IEdge, MeridStressCell, InterpChoice);
      const Real ZonalStressEdge = 0.1;
      const Real MeridStressEdge = 0.1;

      NormalStressEdge(IEdge) =
          Kokkos::cos(AngleEdge(IEdge)) * ZonalStressEdge +
          Kokkos::sin(AngleEdge(IEdge)) * MeridStressEdge;
   }

   void registerFields(const std::string &AuxGroupName,
                       const std::string &MeshName) const;
   void unregisterFields() const;

 private:
   InterpCellToEdge Interp;
   Array2DI4 CellsOnEdge;
   Array1DReal AngleEdge;
};

@brian-oneill
Copy link

Here's the issue:

class InterpCellToEdge {
 public:
   InterpCellToEdge(const HorzMesh *Mesh);

   KOKKOS_FUNCTION Real operator()(int IEdge, const Array1DReal &ArrayCell,
                                   InterpCellToEdgeOption Option) const {
      switch (Option) {
      case InterpCellToEdgeOption::Anisotropic:
         return interpolateAnisotropic(IEdge, ArrayCell);
      case InterpCellToEdgeOption::Isotropic:
         return interpolateIsotropic(IEdge, ArrayCell);
      }
   };

In InterpCellToEdge, the functor doesn't return a value if the the config option isn't set. It doesn't have a default value and the option isn't getting read from the Config during the TIMESTEPPER test, hence the non-void function did not return a value.

@mark-petersen mark-petersen force-pushed the omega/wind-forcing-and-bottom-drag branch from 5032675 to 45f937c Compare June 10, 2025 13:52
@mark-petersen
Copy link

With previous addition from @brian-oneill: now passes all tests on Perlmutter CPU compiler=gnu and GPU compiler=gnugpu; Frontier compiler=craycray CPU and compiler=craycray-mphipcc GPU. Thanks, that took care of the problem.

@mark-petersen mark-petersen merged commit 9bb4fa7 into E3SM-Project:develop Jun 12, 2025
1 of 3 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

9 participants